download_module: nemo_curator.download.commoncrawl.CommonCrawlWARCDownloader
download_params: {}
iterator_module: nemo_curator.download.commoncrawl.CommonCrawlWARCIterator
iterator_params: {}
extract_module: nemo_curator.download.commoncrawl.CommonCrawlWARCExtractor
extract_params: {}
format:
  text: str
  language: str
  url: str
  warc_id: str
  source_id: str
